Submit an Rmd or Qmd and publish an html file on RPubs with the following:
Final datasets for creation of visualization
Join all datasets together
Calculate any remaining data elements needed for analysis
Show code used to create joined dataset, but please do not print full data frame output (showing data structure with str() is okay)
# Flu data only has quarters 2022.4 to 2023.2 Vax data has# quarters 2022.1 to 2023.3 Before joining, filter vax data# to only 2022.4 to 2023.2df_vax <- df_vax %>%filter(quarter >=2022.4& quarter <=2023.2)# perform inner_join - we don't want any data that doesn't# have a match in both datasetsdf_joined <-inner_join(df_flu, df_vax, by =join_by(age_category, county, quarter)) %>%select(!c(sum_dx_new, sum_severe_new)) %>%drop_na()# show data structurestr(df_joined)
tibble [678 × 6] (S3: tbl_df/tbl/data.frame)
$ age_category : chr [1:678] "0-17" "0-17" "0-17" "0-17" ...
$ county : chr [1:678] "Alameda County" "Alameda County" "Alameda County" "Amador County" ...
$ quarter : num [1:678] 2022 2023 2023 2022 2023 ...
$ dx_new_rate : num [1:678] 0.00493 0.03555 0.00671 0.00502 0.035 ...
$ severe_new_rate : num [1:678] 0.000751 0.000888 0.001812 0 0.001103 ...
$ mean_vaccination_rate: num [1:678] 0.578 0.592 0.596 0.286 0.287 ...
Visualizations (at least one per group member)
One print quality table as requested in scenario
One print quality plot or chart as requested in scenario
For groups of 3, one additional print quality table or plot of your choice (can support the requested data in the scenario, or answer a different question using the same data sources)
Research Question(s)
CAUTION: It’s crucial to recognize that this analysis pertains to aggregate-level data, and caution should be exercised in generalizing these findings to individual-level dynamics to avoid the ecological fallacy.
Identify if COVID vaccination rates reflect flu vaccination rates in populations and or if more should be done to encourage flu vaccinations
# COVID vaccination rates vs flu rate (new cases), per# quarter, color coded by county# Re-format facet labels for quartersqtr.labs <-c("Q4-2022", "Q1-2023", "Q2-2023")names(qtr.labs) <-c(2022.4, 2023.1, 2023.2)vax_flu_byqtr <- df_joined %>%group_by(county, quarter) %>%mutate(mean_vaccination_rate =mean(mean_vaccination_rate),dx_new_rate =mean(dx_new_rate)) %>%ggplot(aes(x = mean_vaccination_rate, y = dx_new_rate, color = county)) +geom_point() +geom_smooth(method = lm, color ="red") +theme_minimal() +scale_y_continuous(labels =function(x) round(x *100, digits =2)) +labs(x ="Mean COVID Vaccination Rate (fully vaccinated / population) ",y ="Mean Flu Rate (new diagnoses / 100 susceptible)",title ="COVID vaccination rate does not significantly reflect flu vaccination rate",subtitle ="based on quarterly COVID vaccination rate data compared to simulated flu incidence rate data per quarter across 57 California counties, from Q4-2022 to Q2-2023") +facet_wrap(~quarter, scales ="free", labeller =labeller(quarter = qtr.labs)) +theme(legend.position ="none", plot.title.position ="plot",plot.title =element_text(face ="bold"), plot.subtitle =element_text(face ="italic"),strip.text.x =element_text(face ="bold"), axis.title.y =element_text(margin =margin(r =5)),axis.title.x =element_text(margin =margin(t =5)))ggplotly(vax_flu_byqtr) %>%layout(title =list(text =paste0("COVID vaccination rate does not significantly reflect flu vaccination rate.","<br>", "<sup>", "Across 57 California counties, from Q4-2022 to Q2-2023","</sup>"), yref ="container", xref ="paper", yref ="paper"),annotations =list(text =paste0("<sup>", "Based on quarterly COVID vaccination rate data compared to simulated flu incidence rate data per quarter","</sup>"), showarrow = F, xref ="paper", yref ="paper",xanchor ="right", yanchor ="auto", xshift =350,yshift =-230), margin =list(t =100, b =110, l =100,r =30))
COVID vaccination rate vs flu rate interpretation: Analysis of the COVID vaccination rate data and simulated flu data indicates that COVID vaccination does not seem to strongly reflect flu vaccination within the 57 California counties from which data was collected/simulated. Additional measures to promote flu vaccination may be needed.
Identify where there is any correlation between COVID vaccination rates on flu rates and/or severity.
summary(df_joined)
age_category county quarter dx_new_rate
Length:678 Length:678 Min. :2022 Min. :0.003570
Class :character Class :character 1st Qu.:2022 1st Qu.:0.004953
Mode :character Mode :character Median :2023 Median :0.006699
Mean :2023 Mean :0.015761
3rd Qu.:2023 3rd Qu.:0.035506
Max. :2023 Max. :0.040653
severe_new_rate mean_vaccination_rate
Min. :0.000000 Min. :0.1409
1st Qu.:0.002828 1st Qu.:0.4624
Median :0.015257 Median :0.7096
Mean :0.038274 Mean :0.6512
3rd Qu.:0.050552 3rd Qu.:0.8336
Max. :0.428571 Max. :1.1377
# Create categories based on COVID vaccination rates and# severe flu ratesdata <- df_joined %>%mutate(COVID_Vaccination_Rate =ifelse(mean_vaccination_rate <=median(mean_vaccination_rate), "Low Vaccination Rate (Min = 0.14)","High Vaccination Rate (Max = 1.14)"), Severe_Flu_Rate =ifelse(severe_new_rate <=median(severe_new_rate), "Low Severe Flu Rate (Min = 0.00)","High Severe Flu Rate (Max = 0.43)"))# Calculate the correlation for each combinationcorrelation_table <- data %>%group_by(COVID_Vaccination_Rate, Severe_Flu_Rate) %>%summarise(Correlation =round(cor(mean_vaccination_rate, severe_new_rate), 2))# Rename columnscolnames(correlation_table) <-c("COVID Vaccination Rate", "Mean Severe Flu Rate","Correlation")# Print the correlation table with kableExtrakable(correlation_table, format ="html", caption ="No clear correlation between COVID vaccination rates and mean severe flu rate, Q4-2022 - Q2-2023") %>%kable_styling(full_width =FALSE)
No clear correlation between COVID vaccination rates and mean severe flu rate, Q4-2022 - Q2-2023
COVID Vaccination Rate
Mean Severe Flu Rate
Correlation
High Vaccination Rate (Max = 1.14)
High Severe Flu Rate (Max = 0.43)
0.10
High Vaccination Rate (Max = 1.14)
Low Severe Flu Rate (Min = 0.00)
-0.01
Low Vaccination Rate (Min = 0.14)
High Severe Flu Rate (Max = 0.43)
0.13
Low Vaccination Rate (Min = 0.14)
Low Severe Flu Rate (Min = 0.00)
0.45
Correlation table interpretation: Correlation values per each case provide insights into the relationships between COVID vaccination rates and severe flu cases rates in different scenarios, ranging from weak negative to moderate positive correlations. There is a significant weak negative correlation between the high vaccination rate group and low severe flu rate which might be impacted by confounders.
Does COVID vaccination affect flu severity within each age group?
# Visualization: One print quality plot or chart as# requested in scenario Using Both a table & a chart seem# appropriate for this specific research question Rename# joined dfage_group_vax_vs_flu_severity <- df_joined# Rename dfage_group_vax_vs_flu_severity_table <- age_group_vax_vs_flu_severity# Create a summary tablesummary_table <- age_group_vax_vs_flu_severity_table %>%group_by(age_category) %>%summarize(mean_vaccination_rate =mean(mean_vaccination_rate),mean_flu_severity =mean(severe_new_rate))# Rename summary table datacolnames(summary_table) <-c("Age Category", "Mean COVID Vaccination Rate","Mean Severe Flu Rate")# Print the table using kableExtra (Quality table #1)kable(summary_table, "html", caption ="High COVID vaccination rate does not necessarily imply low severe flu rate") %>%kable_styling(full_width =FALSE)
High COVID vaccination rate does not necessarily imply low severe flu rate
Age Category
Mean COVID Vaccination Rate
Mean Severe Flu Rate
0-17
0.3537977
0.0012448
18-49
0.6444651
0.0081145
50-64
0.7660298
0.0392774
65+
0.8302451
0.1031616
# Quality plot# Scatter plot with facets for each age categoryggplot(data = age_group_vax_vs_flu_severity, aes(x = mean_vaccination_rate,y = severe_new_rate, color = mean_vaccination_rate)) +geom_point(size =3) +labs(title ="High COVID vaccination rate does not necessarily imply low severe flu rate",subtitle ="Scatter Plots by Age Categories", x ="Mean Vaccination Rate",y ="Flu Severity Rate", color ="Mean Vaccination Rate") +geom_smooth(method = lm, color ="yellow") +scale_color_gradient(low ="blue",high ="red") +facet_wrap(~age_category, scales ="free") +theme_minimal()
COVID vaccination versus flu severity by age group interpretation: The data indicates an age-related uptrend in vaccination rates among the older demographics. Intriguingly, despite heightened vaccination coverage in these age groups, there is a simultaneous upswing in average flu severity.